Celestin Apprentice 5

home *** CD-ROM | disk | FTP | other *** search

/ Celestin Apprentice 5 / Apprentice-Release5.iso / Source Code / C / Applications / Python 1.3.3 / Python 133 PPC / Demo / www / waislib.py < prev next >

Wrap

Text File | 1996-05-19 | 4KB | 171 lines

# WAIS client interface, based on the waisq model. # This accurately parses the waisq question file, so no surprises! # (XXX But the parser is too slow...) import waisqp import tempfile import os # The waisq program. XXX Rely on user's $PATH WAISQ = 'waisq -c /ufs/guido/src/wais/wais-sources/' # Question class. Normal usage: # >>> q = Question() # >>> q.set_seed_words('some interesting phrase') # >>> q.add_source('foobar.src') # >>> ... # >>> q.do_query() # >>> for x in q.get_result_summary(): print x # To print document number i (0 <= i < number-of-documents): # >>> q.write_document(i, sys.stdout) # or, e.g.: # >>> q.pipe_document(i, '/usr/ucb/more') # or: # >>> print q.get_document(i) # class Question: # def __init__(self): self.seed_words = '' self.sources = [] self.relevant_documents = waisqp.List() self.result_documents = waisqp.List() self.scratch = tempfile.mktemp() # def close(self): try: os.unlink(self.scratch) except os.error: pass # def set_seed_words(self, seed_words): self.seed_words = seed_words # def get_seed_words(self): return self.seed_words # def add_source(self, source): self.sources.append(source) # def reset_sources(self): self.sources = [] # def get_sources(self): return self.sources # def get_result_documents(self): return self.result_documents # def do_query(self): qrec = waisqp.Record('question') qrec['version'] = '2' qrec['seed-words'] = '"' + self.seed_words + '"' qrec['relevant-documents'] = self.relevant_documents slist = waisqp.List() for source in self.sources: s = waisqp.Record('source-id') s['filename'] = '"' + source + '"' slist.append(s) qrec['sources'] = slist file = self.scratch f = open(file, 'w') f.write(`qrec`) f.close() sts = os.system(WAISQ + ' -f ' + file + ' -g; echo 1>&2\n') if sts != 0: raise RuntimeError, 'waisq exit status ' + `sts` record = waisqp.parsefile(file) if record.gettype() <> 'question': raise RuntimeError, 'waisq did not write a question' self.result_documents = record['result-documents'] # def get_result_summary(self): summary = [] for doc in self.result_documents: score = doc['score'] doc = doc['document'] headline = doc['headline'] lines = doc['number-of-lines'] bytes = doc['number-of-bytes'] type = doc['type'] date = doc['date'] rec = score, headline, lines, bytes, type, date summary.append(rec) return summary # def get_document(self, i): if not 0 <= i < len(self.result_documents): raise IndexError, 'document number out of range' cmd = WAISQ + ' -f ' + self.scratch + ' -v ' + `i+1` + '\n' p = os.popen(cmd, 'r') data = p.read() sts = p.close() if sts: sys.stderr.write('waisq exit status ' + `sts` + '\n') return data # def pipe_document(self, i, backend): if not 0 <= i < len(self.result_documents): raise IndexError, 'document number out of range' cmd = WAISQ + ' -f ' + self.scratch + ' -v ' + `i+1` cmd = cmd + ' | ' + backend + '\n' sts = os.system(cmd) if sts: sys.stderr.write('pipe_document exit status ' + \ `sts` + '\n') # def write_document(self, i, f, *rest): if rest: if len(rest) > 1: raise TypeError, 'too many arguments' bufsize = rest[0] else: bufsize = 8192 if not 0 <= i < len(self.result_documents): raise IndexError, 'document number out of range' cmd = WAISQ + ' -f ' + self.scratch + ' -v ' + `i+1` + '\n' p = os.popen(cmd, 'r') while 1: buf = p.read(bufsize) if not buf: break f.write(buf) sts = p.close() if sts: sys.stderr.write('waisq exit status ' + `sts` + '\n') # Test program. # usage: python -c 'import waislib;waislib.test()' database word ... # def test(): import sys, string if len(sys.argv) < 3: sys.stderr.write('usage: test database word ...\n') sys.exit(2) db = sys.argv[1] if db[-4:] <> '.src': db = db + '.src' q = Question() try: q.add_source(db) q.set_seed_words(string.join(sys.argv[2:])) q.do_query() K = 1024 i = 0 summary = q.get_result_summary() for rec in summary: score, headline, lines, bytes, type, date = rec bytes = eval(bytes) print i+1, score, `(bytes+K-1)/K` + 'K', type, date, \ headline reply = raw_input('retrieve this document? [yn](n) ') if string.lower(string.strip(reply)[:1]) == 'y': if summary[i][4] == '"MIME"': q.pipe_document(i, 'metamail') else: q.pipe_document(i, '${PAGER-more}') i = i+1 finally: q.close()